><-i_like_snake_case)# is a commentInstall once per machine
install.packages("tidyverse") # yes in quotesLoad once per R work session
library(tidyverse) # no no quotesOverriding conflicts
select <- dplyr::selectGet help
?selectYou can also get packages that are available on GitHub (may be development versions of what is on CRAN)
install.packages("devtools")
devtools::install_github("gadenbuie/ggpomological", build_vignette = TRUE)
library(ggpomological) # load like any other packageRecommended IDE.
Download open source desktop version: https://www.rstudio.com/products/rstudio/
Use Projects! https://support.rstudio.com/hc/en-us/articles/200526207-Using-Projects
What is it? https://www.tidyverse.org
Piping/chaining with the %>% operator
Problem:
Nesting your dataframe in commands is hard to read
head(iris)## Sepal.Length Sepal.Width Petal.Length Petal.Width Species
## 1 5.1 3.5 1.4 0.2 setosa
## 2 4.9 3.0 1.4 0.2 setosa
## 3 4.7 3.2 1.3 0.2 setosa
## 4 4.6 3.1 1.5 0.2 setosa
## 5 5.0 3.6 1.4 0.2 setosa
## 6 5.4 3.9 1.7 0.4 setosa
Solution:
Piping your dataframe into a command lets you read L to R
iris %>% head(.)## Sepal.Length Sepal.Width Petal.Length Petal.Width Species
## 1 5.1 3.5 1.4 0.2 setosa
## 2 4.9 3.0 1.4 0.2 setosa
## 3 4.7 3.2 1.3 0.2 setosa
## 4 4.6 3.1 1.5 0.2 setosa
## 5 5.0 3.6 1.4 0.2 setosa
## 6 5.4 3.9 1.7 0.4 setosa
Without the pipe, sequences of commands are read inside out
head(iris[iris$Species == "virginica", ])## Sepal.Length Sepal.Width Petal.Length Petal.Width Species
## 101 6.3 3.3 6.0 2.5 virginica
## 102 5.8 2.7 5.1 1.9 virginica
## 103 7.1 3.0 5.9 2.1 virginica
## 104 6.3 2.9 5.6 1.8 virginica
## 105 6.5 3.0 5.8 2.2 virginica
## 106 7.6 3.0 6.6 2.1 virginica
Chaining your commands lets you read L to R
iris %>% filter(Species == "virginica") %>% head(.) ## Sepal.Length Sepal.Width Petal.Length Petal.Width Species
## 1 6.3 3.3 6.0 2.5 virginica
## 2 5.8 2.7 5.1 1.9 virginica
## 3 7.1 3.0 5.9 2.1 virginica
## 4 6.3 2.9 5.6 1.8 virginica
## 5 6.5 3.0 5.8 2.2 virginica
## 6 7.6 3.0 6.6 2.1 virginica
readr packagetidyverse bundle.We don’t need this as it loaded in the tidyverse package for us but a reminder…
install.packages("readr") # once per machine
library(readr) # once per work sessionRead in from a url
Our data is stored in a gist: https://gist.github.com/kylebgorman/77ce12c9167554ade560af9d34565c11
I saved the raw url as a human-readable link.
mazes <- read_csv("http://bit.ly/mazes-gist")Problem: File paths are hard
Solution: Use here::here
library(here)
mazes <- read_csv(here("data", "mazes.csv")) # from readr
glimpse(mazes) # from dplyr## Observations: 381
## Variables: 12
## $ Study.ID <chr> "CSLU-001", "CSLU-001", "CSLU-001", "CSLU-001", "CSLU...
## $ CA <dbl> 5.6667, 5.6667, 5.6667, 5.6667, 6.5000, 6.5000, 6.500...
## $ VIQ <dbl> 124, 124, 124, 124, 124, 124, 124, 124, 108, 108, 108...
## $ DX <chr> "TD", "TD", "TD", "TD", "TD", "TD", "TD", "TD", "TD",...
## $ Activity <chr> "Conversation", "Picture Description", "Play", "Wordl...
## $ Content <dbl> 24, 1, 21, 8, 3, 5, 8, 2, 25, 10, 2, 5, 32, 20, 13, 2...
## $ Filler <dbl> 31, 2, 6, 2, 10, 3, 8, 2, 21, 13, 10, 2, 12, 9, 4, 4,...
## $ REP <dbl> 2, 0, 3, 0, 3, 2, 3, 0, 4, 0, 0, 0, 13, 5, 5, 6, 10, ...
## $ REV <dbl> 5, 0, 8, 4, 0, 1, 2, 0, 4, 2, 1, 3, 8, 7, 2, 8, 5, 1,...
## $ FS <dbl> 17, 1, 10, 4, 0, 2, 3, 2, 17, 8, 1, 2, 11, 8, 6, 7, 1...
## $ Cued <dbl> 36, 2, 6, 2, 10, 3, 9, 2, 29, 13, 11, 2, 14, 12, 4, 1...
## $ Not.Cued <dbl> 50, 3, 27, 10, 13, 8, 15, 4, 38, 23, 11, 7, 42, 26, 1...
Problem: Variable types are hard
Solution: Use readr::read_csv(col_types)
mazes <- read_csv(here("data", "mazes.csv"),
col_types = cols(
DX = col_factor(levels = NULL),
Activity = col_factor(levels = NULL)
)
)
glimpse(mazes) # from dplyr## Observations: 381
## Variables: 12
## $ Study.ID <chr> "CSLU-001", "CSLU-001", "CSLU-001", "CSLU-001", "CSLU...
## $ CA <dbl> 5.6667, 5.6667, 5.6667, 5.6667, 6.5000, 6.5000, 6.500...
## $ VIQ <dbl> 124, 124, 124, 124, 124, 124, 124, 124, 108, 108, 108...
## $ DX <fct> TD, TD, TD, TD, TD, TD, TD, TD, TD, TD, TD, TD, TD, T...
## $ Activity <fct> Conversation, Picture Description, Play, Wordless Pic...
## $ Content <dbl> 24, 1, 21, 8, 3, 5, 8, 2, 25, 10, 2, 5, 32, 20, 13, 2...
## $ Filler <dbl> 31, 2, 6, 2, 10, 3, 8, 2, 21, 13, 10, 2, 12, 9, 4, 4,...
## $ REP <dbl> 2, 0, 3, 0, 3, 2, 3, 0, 4, 0, 0, 0, 13, 5, 5, 6, 10, ...
## $ REV <dbl> 5, 0, 8, 4, 0, 1, 2, 0, 4, 2, 1, 3, 8, 7, 2, 8, 5, 1,...
## $ FS <dbl> 17, 1, 10, 4, 0, 2, 3, 2, 17, 8, 1, 2, 11, 8, 6, 7, 1...
## $ Cued <dbl> 36, 2, 6, 2, 10, 3, 9, 2, 29, 13, 11, 2, 14, 12, 4, 1...
## $ Not.Cued <dbl> 50, 3, 27, 10, 13, 8, 15, 4, 38, 23, 11, 7, 42, 26, 1...
Problem: But I’m a visual person…
Solution: Use visdat::vis_dat()
library(visdat)
vis_dat(mazes)Problem: Variable names are messy
Solution: Use janitor::clean_names()
library(janitor)
mazes <- mazes %>%
clean_names()
glimpse(mazes)## Observations: 381
## Variables: 12
## $ study_id <chr> "CSLU-001", "CSLU-001", "CSLU-001", "CSLU-001", "CSLU...
## $ ca <dbl> 5.6667, 5.6667, 5.6667, 5.6667, 6.5000, 6.5000, 6.500...
## $ viq <dbl> 124, 124, 124, 124, 124, 124, 124, 124, 108, 108, 108...
## $ dx <fct> TD, TD, TD, TD, TD, TD, TD, TD, TD, TD, TD, TD, TD, T...
## $ activity <fct> Conversation, Picture Description, Play, Wordless Pic...
## $ content <dbl> 24, 1, 21, 8, 3, 5, 8, 2, 25, 10, 2, 5, 32, 20, 13, 2...
## $ filler <dbl> 31, 2, 6, 2, 10, 3, 8, 2, 21, 13, 10, 2, 12, 9, 4, 4,...
## $ rep <dbl> 2, 0, 3, 0, 3, 2, 3, 0, 4, 0, 0, 0, 13, 5, 5, 6, 10, ...
## $ rev <dbl> 5, 0, 8, 4, 0, 1, 2, 0, 4, 2, 1, 3, 8, 7, 2, 8, 5, 1,...
## $ fs <dbl> 17, 1, 10, 4, 0, 2, 3, 2, 17, 8, 1, 2, 11, 8, 6, 7, 1...
## $ cued <dbl> 36, 2, 6, 2, 10, 3, 9, 2, 29, 13, 11, 2, 14, 12, 4, 1...
## $ not_cued <dbl> 50, 3, 27, 10, 13, 8, 15, 4, 38, 23, 11, 7, 42, 26, 1...
Problem: We want summary statistics for all variables with one line of code
Solution: Use skimr::skim()
library(skimr)
skim(mazes)## Skim summary statistics
## n obs: 381
## n variables: 12
##
## Variable type: character
## variable missing complete n min max empty n_unique
## 1 study_id 0 381 381 8 8 0 97
##
## Variable type: factor
## variable missing complete n n_unique
## 1 activity 0 381 381 4
## 2 dx 0 381 381 3
## top_counts ordered
## 1 Wor: 97, Pla: 96, Con: 94, Pic: 94 FALSE
## 2 ASD: 183, TD: 127, SLI: 71, NA: 0 FALSE
##
## Variable type: numeric
## variable missing complete n mean sd min p25 median p75
## 1 ca 0 381 381 6.83 1.06 4.75 6.08 6.83 7.58
## 2 content 0 381 381 18.73 24.84 0 5 12 23
## 3 cued 0 381 381 14.36 24.22 0 2 6 15
## 4 filler 0 381 381 11.2 17.59 0 2 5 13
## 5 fs 0 381 381 8.7 12.76 0 2 5 11
## 6 not_cued 0 381 381 26.77 31.73 1 8 17 33
## 7 rep 0 381 381 6.24 9.45 0 1 3 7
## 8 rev 0 381 381 3.79 4.31 0 1 3 5
## 9 viq 0 381 381 100.82 18.74 53 85 102 116
## max hist
## 1 8.92 ▅▆▇▇▇▇▆▂
## 2 214 ▇▁▁▁▁▁▁▁
## 3 230 ▇▁▁▁▁▁▁▁
## 4 152 ▇▁▁▁▁▁▁▁
## 5 118 ▇▁▁▁▁▁▁▁
## 6 222 ▇▂▁▁▁▁▁▁
## 7 87 ▇▁▁▁▁▁▁▁
## 8 37 ▇▂▁▁▁▁▁▁
## 9 143 ▁▁▇▇▆▅▅▂
Problem: We need a sanity check for our data
Solution: Count everything
mazes %>% tally() # number of rows## # A tibble: 1 x 1
## n
## <int>
## 1 381
mazes %>% distinct(study_id) %>% count() # overall n## # A tibble: 1 x 1
## n
## <int>
## 1 97
mazes %>%
summarize(n = n_distinct(study_id))## # A tibble: 1 x 1
## n
## <int>
## 1 97
mazes %>%
count(dx, activity) # n by group/activity## # A tibble: 12 x 3
## dx activity n
## <fct> <fct> <int>
## 1 TD Conversation 32
## 2 TD Picture Description 31
## 3 TD Play 32
## 4 TD Wordless Picture Book 32
## 5 ASD Conversation 45
## 6 ASD Picture Description 45
## 7 ASD Play 46
## 8 ASD Wordless Picture Book 47
## 9 SLI Conversation 17
## 10 SLI Picture Description 18
## 11 SLI Play 18
## 12 SLI Wordless Picture Book 18
# frequency tables
mazes_tab <- mazes %>%
tabyl(dx, activity) # from janitor package
mazes_tab## dx Conversation Picture Description Play Wordless Picture Book
## TD 32 31 32 32
## ASD 45 45 46 47
## SLI 17 18 18 18
mazes_tab %>%
adorn_percentages("col") %>%
adorn_pct_formatting() %>%
adorn_ns()## dx Conversation Picture Description Play Wordless Picture Book
## TD 34.0% (32) 33.0% (31) 33.3% (32) 33.0% (32)
## ASD 47.9% (45) 47.9% (45) 47.9% (46) 48.5% (47)
## SLI 18.1% (17) 19.1% (18) 18.8% (18) 18.6% (18)
Other packages for data import in R bundled in tidyverse:
DBI, for databases.haven, for SPSS, SAS and Stata files.httr, for web apis.jsonlite for JSON.readxl, for .xls and .xlsx files.rvest, for web scraping.xml2, for XML.If you can, I suggest trying to stay in the tidyverse of packages (at least to start). Recommendations:
readr to import (or other tidyverse options above)readr to specify column types using col_types argumentvisdat::vis_dat() to visualize column typeshere for file paths (hurrah!)janitor::clean_names() to clean up variable namesskimr to skim() all the summary statistics in one linedplyr to count rows (yay for sanity checks)Five main single-table verbs in dplyr:
select: Choose variables/columns by their namesfilter: Pick rows based on conditions about their valuessummarize: Create summary measures of variables (or groups of observations on variables using group_by)mutate: Make a new variable in the data framearrange: Sort the rows based on one or more variablesCombine with group_by()
The package tidyr has two primary functions for tidying data:
gather()spread()3 rules:
Let’s work again with the mazes data.
It is actually already tidy. Why?
mazes## # A tibble: 381 x 12
## study_id ca viq dx activity content filler rep rev fs
## <chr> <dbl> <dbl> <fct> <fct> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 CSLU-001 5.67 124 TD Conversati… 24.0 31.0 2.00 5.00 17.0
## 2 CSLU-001 5.67 124 TD Picture De… 1.00 2.00 0 0 1.00
## 3 CSLU-001 5.67 124 TD Play 21.0 6.00 3.00 8.00 10.0
## 4 CSLU-001 5.67 124 TD Wordless P… 8.00 2.00 0 4.00 4.00
## 5 CSLU-002 6.50 124 TD Conversati… 3.00 10.0 3.00 0 0
## 6 CSLU-002 6.50 124 TD Picture De… 5.00 3.00 2.00 1.00 2.00
## 7 CSLU-002 6.50 124 TD Play 8.00 8.00 3.00 2.00 3.00
## 8 CSLU-002 6.50 124 TD Wordless P… 2.00 2.00 0 0 2.00
## 9 CSLU-007 7.50 108 TD Conversati… 25.0 21.0 4.00 4.00 17.0
## 10 CSLU-007 7.50 108 TD Picture De… 10.0 13.0 0 2.00 8.00
## # ... with 371 more rows, and 2 more variables: cued <dbl>, not_cued <dbl>
glimpse(mazes)## Observations: 381
## Variables: 12
## $ study_id <chr> "CSLU-001", "CSLU-001", "CSLU-001", "CSLU-001", "CSLU...
## $ ca <dbl> 5.6667, 5.6667, 5.6667, 5.6667, 6.5000, 6.5000, 6.500...
## $ viq <dbl> 124, 124, 124, 124, 124, 124, 124, 124, 108, 108, 108...
## $ dx <fct> TD, TD, TD, TD, TD, TD, TD, TD, TD, TD, TD, TD, TD, T...
## $ activity <fct> Conversation, Picture Description, Play, Wordless Pic...
## $ content <dbl> 24, 1, 21, 8, 3, 5, 8, 2, 25, 10, 2, 5, 32, 20, 13, 2...
## $ filler <dbl> 31, 2, 6, 2, 10, 3, 8, 2, 21, 13, 10, 2, 12, 9, 4, 4,...
## $ rep <dbl> 2, 0, 3, 0, 3, 2, 3, 0, 4, 0, 0, 0, 13, 5, 5, 6, 10, ...
## $ rev <dbl> 5, 0, 8, 4, 0, 1, 2, 0, 4, 2, 1, 3, 8, 7, 2, 8, 5, 1,...
## $ fs <dbl> 17, 1, 10, 4, 0, 2, 3, 2, 17, 8, 1, 2, 11, 8, 6, 7, 1...
## $ cued <dbl> 36, 2, 6, 2, 10, 3, 9, 2, 29, 13, 11, 2, 14, 12, 4, 1...
## $ not_cued <dbl> 50, 3, 27, 10, 13, 8, 15, 4, 38, 23, 11, 7, 42, 26, 1...
It could have been seriously un-tidy…
## # A tibble: 2,667 x 7
## study_id ca viq dx activity variable_type maze_count
## <chr> <dbl> <dbl> <fct> <fct> <chr> <dbl>
## 1 CSLU-001 5.67 124 TD Conversation content 24.0
## 2 CSLU-001 5.67 124 TD Picture Description content 1.00
## 3 CSLU-001 5.67 124 TD Play content 21.0
## 4 CSLU-001 5.67 124 TD Wordless Picture B… content 8.00
## 5 CSLU-002 6.50 124 TD Conversation content 3.00
## 6 CSLU-002 6.50 124 TD Picture Description content 5.00
## 7 CSLU-002 6.50 124 TD Play content 8.00
## 8 CSLU-002 6.50 124 TD Wordless Picture B… content 2.00
## 9 CSLU-007 7.50 108 TD Conversation content 25.0
## 10 CSLU-007 7.50 108 TD Picture Description content 10.0
## # ... with 2,657 more rows
## Observations: 97
## Variables: 32
## $ study_id <chr> "CSLU-001", "CSLU-002", "CSLU...
## $ ca <dbl> 5.6667, 6.5000, 7.5000, 5.250...
## $ viq <dbl> 124, 124, 108, 112, 102, 102,...
## $ dx <fct> TD, TD, TD, TD, ASD, TD, ASD,...
## $ Conversation_content <dbl> 24, 3, 25, 32, 27, 60, 30, 27...
## $ Conversation_cued <dbl> 36, 10, 29, 14, 17, 27, 7, 24...
## $ Conversation_filler <dbl> 31, 10, 21, 12, 12, 23, 6, 18...
## $ Conversation_fs <dbl> 17, 0, 17, 11, 12, 21, 9, 16,...
## $ Conversation_not_cued <dbl> 50, 13, 38, 42, 34, 79, 35, 3...
## $ Conversation_rep <dbl> 2, 3, 4, 13, 10, 20, 17, 4, 5...
## $ Conversation_rev <dbl> 5, 0, 4, 8, 5, 19, 4, 7, 1, 1...
## $ `Picture Description_content` <dbl> 1, 5, 10, 20, 9, 36, 21, 4, N...
## $ `Picture Description_cued` <dbl> 2, 3, 13, 12, 7, 11, 2, 5, NA...
## $ `Picture Description_filler` <dbl> 2, 3, 13, 9, 6, 6, 2, 5, NA, ...
## $ `Picture Description_fs` <dbl> 1, 2, 8, 8, 3, 10, 11, 1, NA,...
## $ `Picture Description_not_cued` <dbl> 3, 8, 23, 26, 14, 37, 23, 9, ...
## $ `Picture Description_rep` <dbl> 0, 2, 0, 5, 5, 16, 8, 1, NA, ...
## $ `Picture Description_rev` <dbl> 0, 1, 2, 7, 1, 10, 2, 2, NA, ...
## $ Play_content <dbl> 21, 8, 2, 13, 12, 17, 54, 10,...
## $ Play_cued <dbl> 6, 9, 11, 4, 9, 5, 7, 12, 2, ...
## $ Play_filler <dbl> 6, 8, 10, 4, 7, 4, 5, 9, 2, 2...
## $ Play_fs <dbl> 10, 3, 1, 6, 3, 7, 17, 4, 3, ...
## $ Play_not_cued <dbl> 27, 15, 11, 17, 17, 20, 57, 1...
## $ Play_rep <dbl> 3, 3, 0, 5, 5, 3, 26, 1, 5, 4...
## $ Play_rev <dbl> 8, 2, 1, 2, 4, 7, 11, 5, 0, 5...
## $ `Wordless Picture Book_content` <dbl> 8, 2, 5, 21, 6, 14, 17, 8, 4,...
## $ `Wordless Picture Book_cued` <dbl> 2, 2, 2, 11, 4, 10, 0, 0, 3, ...
## $ `Wordless Picture Book_filler` <dbl> 2, 2, 2, 4, 3, 6, 0, 0, 2, 5,...
## $ `Wordless Picture Book_fs` <dbl> 4, 2, 2, 7, 2, 9, 6, 4, 0, 15...
## $ `Wordless Picture Book_not_cued` <dbl> 10, 4, 7, 18, 8, 16, 17, 8, 5...
## $ `Wordless Picture Book_rep` <dbl> 0, 0, 0, 6, 2, 3, 6, 2, 3, 4,...
## $ `Wordless Picture Book_rev` <dbl> 4, 0, 3, 8, 2, 2, 5, 2, 1, 4,...
How could we have fixed this?
mazes_step1 <- mazes_widest %>%
gather(variable_name, maze_count, -c(study_id, ca, viq, dx))
mazes_step1## # A tibble: 2,716 x 6
## study_id ca viq dx variable_name maze_count
## <chr> <dbl> <dbl> <fct> <chr> <dbl>
## 1 CSLU-001 5.67 124 TD Conversation_content 24.0
## 2 CSLU-002 6.50 124 TD Conversation_content 3.00
## 3 CSLU-007 7.50 108 TD Conversation_content 25.0
## 4 CSLU-010 5.25 112 TD Conversation_content 32.0
## 5 CSLU-020 8.25 102 ASD Conversation_content 27.0
## 6 CSLU-024 6.67 102 TD Conversation_content 60.0
## 7 CSLU-027 8.17 81.0 ASD Conversation_content 30.0
## 8 CSLU-031 5.67 143 TD Conversation_content 27.0
## 9 CSLU-036 6.17 77.0 ASD Conversation_content 7.00
## 10 CSLU-046 5.25 129 ASD Conversation_content 36.0
## # ... with 2,706 more rows
mazes_step2 <- mazes_step1 %>%
separate(variable_name, into = c("activity", "var_type"), sep = "_")
mazes_step2## # A tibble: 2,716 x 7
## study_id ca viq dx activity var_type maze_count
## <chr> <dbl> <dbl> <fct> <chr> <chr> <dbl>
## 1 CSLU-001 5.67 124 TD Conversation content 24.0
## 2 CSLU-002 6.50 124 TD Conversation content 3.00
## 3 CSLU-007 7.50 108 TD Conversation content 25.0
## 4 CSLU-010 5.25 112 TD Conversation content 32.0
## 5 CSLU-020 8.25 102 ASD Conversation content 27.0
## 6 CSLU-024 6.67 102 TD Conversation content 60.0
## 7 CSLU-027 8.17 81.0 ASD Conversation content 30.0
## 8 CSLU-031 5.67 143 TD Conversation content 27.0
## 9 CSLU-036 6.17 77.0 ASD Conversation content 7.00
## 10 CSLU-046 5.25 129 ASD Conversation content 36.0
## # ... with 2,706 more rows
mazes_step3 <- mazes_step2 %>%
spread(var_type, maze_count)
mazes_step3## # A tibble: 388 x 12
## study_id ca viq dx activity content cued filler fs not
## <chr> <dbl> <dbl> <fct> <chr> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 CSLU-001 5.67 124 TD Conversati… 24.0 36.0 31.0 17.0 50.0
## 2 CSLU-001 5.67 124 TD Picture De… 1.00 2.00 2.00 1.00 3.00
## 3 CSLU-001 5.67 124 TD Play 21.0 6.00 6.00 10.0 27.0
## 4 CSLU-001 5.67 124 TD Wordless P… 8.00 2.00 2.00 4.00 10.0
## 5 CSLU-002 6.50 124 TD Conversati… 3.00 10.0 10.0 0 13.0
## 6 CSLU-002 6.50 124 TD Picture De… 5.00 3.00 3.00 2.00 8.00
## 7 CSLU-002 6.50 124 TD Play 8.00 9.00 8.00 3.00 15.0
## 8 CSLU-002 6.50 124 TD Wordless P… 2.00 2.00 2.00 2.00 4.00
## 9 CSLU-007 7.50 108 TD Conversati… 25.0 29.0 21.0 17.0 38.0
## 10 CSLU-007 7.50 108 TD Picture De… 10.0 13.0 13.0 8.00 23.0
## # ... with 378 more rows, and 2 more variables: rep <dbl>, rev <dbl>
Let’s do another one…
(plot <- ggplot(mazes, aes(x = content, y = filler)) +
geom_point())(plot +
facet_wrap(~activity))plot <- ggplot(mazes, aes(x = content, y = filler)) +
geom_point(alpha = .3) +
facet_wrap(~activity)
plotplot <- ggplot(mazes, aes(x = content, y = filler, color = dx)) +
geom_point(alpha = .5) +
facet_wrap(~activity)
plotplot <- ggplot(mazes, aes(x = content, y = filler)) +
geom_smooth(se = FALSE) +
geom_point(aes(color = content), shape = 10) +
facet_wrap(~activity)
plotChallenge!
lm) as the smoothing method (function) to use.geom_point(aes(color = dx), alpha = .5). What changes?ggplot(mazes, aes(x = activity, y = content)) +
geom_boxplot()ggplot(mazes, aes(x = activity, y = content)) +
geom_violin()ggplot(mazes, aes(x = activity, y = content)) +
geom_violin(fill = "lightgray", color = NA) +
geom_boxplot(width = .1, colour = "slateblue", fill = "black")ggplot(mazes, aes(content)) +
geom_histogram() +
facet_wrap(~activity)ggplot(mazes, aes(content, fill = dx)) +
geom_density(alpha = .5) +
facet_wrap(~activity)